/*==================================================
Project:       Targeting Social Programs
Authors:       Diether W. Beuermann
               Bridget Hoffmann        
               Marco Stampini 
               David L. Vargas
               Diego Vera-Cossio
----------------------------------------------------
Creation Date:    Sep 2024
Modification Date:   
Do-file version:    01
References:          
Output:             
==================================================*/

/*==================================================
            0: Program set up
==================================================*/
*Written on STATA 17
drop _all

** source dir
cd "${dir5r}" // graph dir

/*==================================================
            1: load and transformations
==================================================*/

*----------- 1.0.1 Load CRRA welfare FUN
qui do "${dir6r}/ados/bstrap_pmt.ado" // load bstap FUN
qui do "${dir6r}/ados/bstrap_pmt_mh.ado" // load bstap FUN
qui do "${dir6r}/ados/baseline_pmt.ado" // load baseline FUN
qui do "${dir6r}/ados/expanded_pmt.ado" // load expanded FUN

*----------  1.1. Data prep:
use "${dir3r}/01_survey/survey_targeting_r1_long.dta", clear

* new interactions 
cap drop d_loss_illness_inc
cap drop d_reco_ilness_inc
gen d_loss_illness_inc = illness_incidence * d_loss_ilness
gen d_reco_ilness_inc = lag_illness_incidence * d_reco_ilness
lab var d_loss_illness_inc	"Loss: Illness Incidence"
lab var d_reco_ilness_inc	"Recovery: Illness Incidence"


cap drop d_loss_cut_remittance_inc
cap drop d_reco_cut_remittance_inc
gen d_loss_cut_remittance_inc = cut_remittance_incidence * d_loss_cut_remittance
gen d_reco_cut_remittance_inc = lag_cut_remittance_incidence * d_reco_cut_remittance
lab var d_loss_cut_remittance_inc "Loss: Remittances Cut Incidence"
lab var d_reco_cut_remittance_inc "Recovery: Remittances Cut Incidence"

* Globals with shocks
glo loss d_loss_ilness d_loss_cut_remittance death divorce bankrupt nat_shock
glo loss_rec      d_loss_ilness d_reco_ilness d_loss_cut_remittance d_reco_cut_remittance death divorce bankrupt nat_shock 
glo loss_inc      illness_incidence cut_remittance_incidence death divorce bankrupt nat_shock 
glo loss_rec_inc  d_loss_illness_inc d_reco_ilness_inc d_loss_cut_remittance_inc d_reco_cut_remittance_inc death divorce bankrupt nat_shock 




*Globals with updted survey variables
glo hh_char_srv urban age_feb20_srv prop_kids_srv n_members_srv elementary_srv highschool_srv tertiary_edu_srv children_srv living_couple
glo assets_srv own_washing_machine_srv own_tractor_srv own_moto_srv own_car_srv own_PC_srv own_fridge_srv
glo hh_srv  wall_finished_srv floor_finished_srv cookpower_connected_srv wc_flush_srv kitchen_srv electricity_srv running_water_srv trash_service_srv 
glo labour_srv formal_work_srv informal_work_srv

// Including labour status from SISBEN
glo pmt_1_srv  $hh_char_srv $assets_srv $hh_srv $labour_srv
** make sure all labels are OK 
lab var illness_incidence 			"Illness Incidence"
lab var cut_remittance_incidence 	"Remittances Cut Incidence"
lab var death						"Death"
lab var divorce					"Separation of Spouses"
lab var bankrupt					"Bankruptcy or Business Closure"
lab var nat_shock					"Natural Disaster or Fire"
lab var job_loss                   "Loss: Job lost at any point of the year"
lab var job_loss_formal            "Loss: Job lost formal at any point of the year"
lab var job_loss_informal          "Loss: Job lost informal at any point of the year"

*----------  1.1.2 Estimate PMT for survey data:

/*==================================================
            2: Create prediction 
==================================================*/

// create a variable with sector 
global sector_vars sector_a sector_b sector_c sector_d sector_e sector_f sector_g sector_h sector_i sector_j sector_k sector_l sector_m sector_n sector_o sector_p sector_q sector_r sector_s sector_t

// new sector variable
gen sector_cat = .

loc j = 0
foreach v of global sector_vars {
    loc ++j
    replace sector_cat = `j' if `v' == 1
    local sector_cat_l`j' : var label `v'
    local sector_cat_lab `" `sector_cat_lab' `j' "`sector_cat_l`j''" "'
}

label define sector_cat_l `sector_cat_lab'
label values sector_cat sector_cat_l

replace sector_cat = . if work_any_srv == . 


// sector in 2019 
gen aux1 = sector_cat if year == 2019
bys id_hogar_SISBEN: egen sector_cat_2019 = max(aux1)
drop aux1

label values sector_cat sector_cat_l

// depto urban-rural bin 
egen bin = group(urb_rur cod_dpto), missing

gen aux1 = bin if year == 2019
bys id_hogar_SISBEN: egen bin_2019 = max(aux1)
drop aux1

// predict employment by depto urban-rural bin 
preserve 

keep year formal_work_srv informal_work_srv work_any_srv pondera cod_dpto sector_cat urb_rur cod_dpto bin

collapse (sum) work_any_srv formal_work_srv informal_work_srv [aw=pondera] , by(year urb_rur cod_dpto bin) // uncomment for sector

sort cod_dpto urb_rur year

foreach v in work_any_srv formal_work_srv informal_work_srv  {
    gen aux1 = `v' if year == 2019
    bys bin: egen `v'_2019 = max(aux1) 
    replace `v' = `v' / `v'_2019 if `v'_2019 != 0
    replace `v' = 0 if `v'_2019 != 0 & `v' == 0
    gen flag_`v' = `v'_2019 == 0
    qui sum flag_`v'
    if r(sd) == 0  drop flag_`v'
    drop aux1 
}
/* the sector that have zero on 2019 consistentely are 0 on 2020 and 2021 
    Probably just sectors that are fully formal or informal.
    change is set to zero to correct the missings due to division by zero
    No issue  */

// year sector cat code dept 
tsset bin year // uncomment for sector

gen d_pred_work_any_srv= d.work_any_srv
gen d_pred_formal_work_srv= d.formal_work_srv
gen d_pred_informal_work_srv= d.informal_work_srv

keep year urb_rur cod_dpto bin year d_pred_work_any_srv d_pred_formal_work_srv d_pred_informal_work_srv // uncomment for sector
rename bin bin_2019  // rename for later merge


// split prediction to allow heterogeneity 

** any work
gen d_pred_gain_work_any_srv = d_pred_work_any_srv if d_pred_work_any_srv > 0 & !missing(d_pred_work_any_srv)
replace d_pred_gain_work_any_srv = 0 if missing(d_pred_gain_work_any_srv) & !missing(d_pred_work_any_srv)

gen d_pred_loss_work_any_srv = d_pred_work_any_srv * -1 if  d_pred_work_any_srv < 0 & !missing(d_pred_work_any_srv)
replace d_pred_loss_work_any_srv = 0 if missing(d_pred_loss_work_any_srv) & !missing(d_pred_work_any_srv)

* formal work
gen d_pred_gain_work_formal_srv = d_pred_formal_work_srv if d_pred_formal_work_srv > 0 & !missing(d_pred_formal_work_srv)
replace d_pred_gain_work_formal_srv = 0 if missing(d_pred_gain_work_formal_srv) & !missing(d_pred_formal_work_srv)

gen d_pred_loss_work_formal_srv = d_pred_formal_work_srv * -1 if  d_pred_formal_work_srv < 0 & !missing(d_pred_formal_work_srv)
replace d_pred_loss_work_formal_srv = 0 if missing(d_pred_loss_work_formal_srv) & !missing(d_pred_formal_work_srv)


tempfile labour_pred
save `labour_pred', replace

restore 

// add prediction to the data
merge m:1 bin_2019 year using `labour_pred', gen(_m_jobpred)

/*
 Result                      Number of obs
    -----------------------------------------
    Not matched                             0
    Matched                            12,147  (_m_jobpred==3)
    -----------------------------------------
*/

/*==================================================
            3: Estimation
==================================================*/
 
//============== Dynamic estimation 


 // -------- Any work
local nreps=1000

//define poverty line in terms of Colombian Pesitos
local base=1.90*1515.1
// 2019 PPP factor https://data.worldbank.org/indicator/PA.NUS.PRVT.PP?locations=CO

*** Gain and losses // Main specification!
bstrap_pmt,  incvar(d_income_nt) variables(d_gain_work_any_srv d_loss_work_any_srv) reps(`nreps')   base(`base') pmtvars(l_pp_inc_srv $pmt_1_srv) predvars($pmt_1_srv) altpredvar(d_pred_gain_work_any_srv d_pred_loss_work_any_srv)
mat estmat0 = r(estt)

/*==================================================
            3: Consolidate data
==================================================*/

** send to stata
cap frame create points
frame change points 

** import estiamtes matrix as a dataframe
drop _all 
svmat estmat0, names(col)
gen type_m = "job_pred_rob"
tempfile dfdyn
save `dfdyn', replace

cap drop base _merge 

foreach v in covered inclusion_err inclusion_ok exclusion_err exclusion_ok zero epov {
    replace `v' = `v' * 100
	replace `v'_uci=`v'_uci*100
	replace `v'_lci=`v'_lci*100
}
 // PPP values 

 gen hh_benefits_lcu = hh_benefits
 replace hh_benefits = hh_benefits / 1515.1 
 replace hh_benefits_uci= hh_benefits_uci/1515.1
 replace hh_benefits_lci= hh_benefits_lci/1515.1
 // 2019 PPP factor https://data.worldbank.org/indicator/PA.NUS.PRVT.PP?locations=CO

// add previous results
append using "${dir3r}/03_auxiliar/Boostrap_estimates.dta"

foreach v in covered hh_benefits inclusion_err inclusion_ok exclusion_err exclusion_ok crra crra_l crra_u zero epov covered_uci covered_lci inclusion_err_uci inclusion_err_lci inclusion_ok_uci inclusion_ok_lci exclusion_err_uci exclusion_err_lci exclusion_ok_uci exclusion_ok_lci crra_uci crra_lci crra_l_uci crra_l_lci crra_u_uci crra_u_lci hh_benefits_uci hh_benefits_lci budget_nat_uci budget_nat_lci zero_uci zero_lci epov_uci epov_lci {
    sum `v' if year == 2019 & model_n == 1
    replace `v' = r(mean) if year == 2019 & model_n != 1 
}

// store data for later use 
*save "${dir3r}/03_auxiliar/Boostrap_estimates_MHrob_inf.dta" , replace

/*==================================================
            4. Make graphs 
 ==================================================*/


*** Gain and losses
#delimit ;
tw (connected exclusion_err year if type_m ==  "Baseline", 
    mcolor(red%80) msym(S) lcolor(red%80) )
    (rcap exclusion_err_uci exclusion_err_lci year if type_m ==  "Baseline", 
    lcolor(red%80) )
    (connected exclusion_err year if type_m ==  "job_pred_rob", 
    lcolor( ltblue)  mcolor( ltblue) msym(0) lpattern(-))
    (rcap exclusion_err_uci exclusion_err_lci year if type_m ==  "job_pred_rob", 
    lcolor( ltblue) lpattern(-))
    (connected exclusion_err year if model_n ==  6 & type_m ==  "OLS" , 
    mcolor(black%80) msym(D) lcolor(black%80) ) 
    (rcap exclusion_err_uci exclusion_err_lci year if model_n ==  6 & type_m ==  "OLS", 
    lcolor(black%80) )	
    ,
    legend(order(1 "Benchmark PMT"		 			
            3 "Dynamic (Region urban/rural work trends)"  					
            5 "Dynamic"	) pos(6) row(2))
    ytitle("Exclusion Error")
    xtitle("Year")
    xlabel(2019(1)2021)
    ylabel(20(5)50)
    name(A, replace)
;
#delimit cr
graph export "FA6_performance_a_gainloss_OLS_jobpred_dep_urb.png", as(png) replace width(1200)
graph export "FA6_performance_a_gainloss_OLS_jobpred_dep_urb.pdf", as(pdf) replace 

#delimit ;
tw (connected inclusion_err year if type_m ==  "Baseline", 
        mcolor(red%80) msym(S) lcolor(red%80))
        (rcap inclusion_err_uci inclusion_err_lci  year if type_m ==  "Baseline", 
        lcolor(red%80))
    (connected inclusion_err year if type_m ==  "job_pred_rob", 
        mcolor(ltblue) msym(0) lpattern(-) lcolor(ltblue))
        (rcap inclusion_err_uci inclusion_err_lci year if type_m ==  "job_pred_rob", 
        lcolor(ltblue) lpattern(-) )
    (connected inclusion_err year if model_n ==  6 & type_m ==  "OLS" , 
        mcolor(black%80) msym(D) lcolor(black%80)) 
            (rcap inclusion_err_uci inclusion_err_lci year if model_n ==  6 & type_m ==  "OLS" , 
        mcolor(black%80) msym(D) lcolor(black%80)) 
        ,
    legend(order(1 "Benchmark PMT"		 			
            3 "Dynamic (Region urban/rural work trends)"  					
            5 "Dynamic"	) pos(6) row(2))
    ytitle("Inclusion Error")
    xtitle("Year")
    xlabel(2019(1)2021)
    ylabel(20(5)50)
    name(B, replace)
;

#delimit cr
graph export "FA6_performance_b_gainloss_OLS_jobpred_dep_urb.png", as(png) replace width(1200)
graph export "FA6_performance_b_gainloss_OLS_jobpred_dep_urb.pdf", as(pdf) replace 

#delimit ;
tw (connected crra year if type_m ==  "Baseline", 
        mcolor(red%80) msym(S) lcolor(red%80))
        (rcap crra_uci crra_lci year if type_m ==  "Baseline", 
        lcolor(red%80))
    (connected crra year if type_m ==  "job_pred_rob", 
        mcolor(ltblue) msym(0) lpattern(-) lcolor(ltblue))
            (rcap crra_uci crra_lci year if type_m ==  "job_pred_rob", 
            lcolor(ltblue))	
    (connected crra year if model_n ==  6 & type_m ==  "OLS" , 
        mcolor(black%80) msym(D) lcolor(black%80)) 
            (rcap crra_uci crra_lci year if model_n ==  6 & type_m ==  "OLS" , 
        lcolor(black%80))	
        ,
    legend(order(1 "Benchmark PMT"		 			
            3 "Dynamic (Region urban/rural work trends)"  					
            5 "Dynamic"	) pos(6) row(2))
    ytitle("Social Welfare (CRRA: {&rho} = 3)")
    xtitle("Year")
    xlabel(2019(1)2021)
    name(C, replace)
;
#delimit cr
graph export "FA6_performance_c_gainloss_OLS_jobpred_dep_urb.png", as(png) replace width(1200)
graph export "FA6_performance_c_gainloss_OLS_jobpred_dep_urb.pdf", as(pdf) replace 

#delimit ;
tw (connected hh_benefits year if type_m ==  "Baseline", 
        mcolor(red%80) msym(S) lcolor(red%80))
    (rcap hh_benefits_uci hh_benefits_lci year if type_m ==  "Baseline", 
            lcolor(red%80))	
    (connected hh_benefits year if type_m ==  "job_pred_rob", 
        mcolor(ltblue) msym(0) lpattern(-) lcolor(ltblue))
            (rcap hh_benefits_uci hh_benefits_lci year if type_m ==  "job_pred_rob", 
        lcolor(ltblue))	
    (connected hh_benefits year if model_n ==  6 & type_m ==  "OLS" , 
        mcolor(black%80) msym(D) lcolor(black%80))
            (rcap hh_benefits_uci hh_benefits_lci year if model_n ==  6 & type_m ==  "OLS" , 
        lcolor(black%80))	
        ,
    legend(order(1 "Benchmark PMT"		 			
            3 "Dynamic (Region urban/rural work trends)"  					
            5 "Dynamic"	) pos(6) row(2))
    ytitle("Per-household Transfer ($ USD PPP)")
    xtitle("Year")
    xlabel(2019(1)2021)
    name(D, replace)
;
#delimit cr
graph export "FA6_performance_d_gainloss_OLS_jobpred_dep_urb.png", as(png) replace width(1200)
graph export "FA6_performance_d_gainloss_OLS_jobpred_dep_urb.pdf", as(pdf) replace 
